rm(list = ls())
setwd("/Users/John/Dropbox/")

# --- Load Packages ---
library(readxl)
library(readr)
library(dplyr)
library(tidyr)
library(stringr)

# --- Load Main Dataset ---

df <- read_csv("JOP_Replication_Materials/data/processed/final_dataset.csv")

# --- Load and Process DV Data ---

or <- read_excel("JOP_Replication_Materials/data/raw/dv_clean.xlsx", sheet = "ownership_restrictions") %>%
  pivot_longer(cols = 7:12, values_to = "isic")

lcr <- read_excel("JOP_Replication_Materials/data/raw/dv_clean.xlsx", sheet = "local_content_requirements") %>%
  pivot_longer(cols = 7:13, values_to = "isic")

gp <- read_excel("JOP_Replication_Materials/data/raw/dv_clean.xlsx", sheet = "government_procurement") %>%
  pivot_longer(cols = 7:11, values_to = "isic")

# --- Combine all DV entries ---
combined <- bind_rows(or, lcr, gp)

# --- Compute Totals per ISIC ---
combined <- combined %>%
  group_by(isic) %>%
  mutate(total_isic = n_distinct(desc_chinese)) %>%
  ungroup() %>%
  distinct(isic, .keep_all = TRUE) %>%
  mutate(isic = str_pad(as.character(isic), 4, pad = "0")) %>%
  dplyr::select(isic, total_isic)

# --- Join with Final Dataset ---
df2 <- df %>%
  left_join(combined, by = "isic") %>%
  distinct(isic, .keep_all = TRUE) %>%
  dplyr::select(isic, total_isic, strategic, median_share, med_hhi_isic2, med_soe_isic2) %>%
  mutate(total_isic = replace_na(total_isic, 0))

# --- Write Output ---
write_csv(df2, "JOP_Replication_Materials/data/processed/total_isic.csv")
